
cap log close
log using "$logs/cr-synth-county.log", replace

// Prepare synthetic data

// White 
import delimited  "$raw/acs5yr/ACS_15_5YR_B01001A_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total 
renvars m_* f_*, prefix(wh_)
tempfile wh
save `wh'

// Black 
import delimited "$raw/acs5yr/ACS_15_5YR_B01001B_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(bl_)
tempfile bl
save `bl'

// Native 
import delimited "$raw/acs5yr/ACS_15_5YR_B01001C_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(na_)
tempfile na
save `na'

// Asian 
import delimited "$raw/acs5yr/ACS_15_5YR_B01001D_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(as_)
tempfile as
save `as'

// PI/Hawaii
import delimited "$raw/acs5yr/ACS_15_5YR_B01001E_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(pi_)
tempfile pi
save `pi'


// Other
import delimited "$raw/acs5yr/ACS_15_5YR_B01001F_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(ot_)
tempfile ot
save `ot'

// Multi
import delimited "$raw/acs5yr/ACS_15_5YR_B01001G_with_ann.csv", varnames(2) clear 
drop margin*
renvars estimate*, presub(estimate )
renvars male*, presub(male m_)
renvars *years, postsub(years )
renvars *yearsandover, postsub(yearsandover )
renvars female*, presub(female f_)
rename m_ m_total
rename f_ f_total
drop total
renvars m_* f_*, prefix(mu_)

merge 1:1 id id2 geography using `wh', assert(3) nogen
merge 1:1 id id2 geography using `bl', assert(3) nogen
merge 1:1 id id2 geography using `na', assert(3) nogen
merge 1:1 id id2 geography using `as', assert(3) nogen
merge 1:1 id id2 geography using `pi', assert(3) nogen
merge 1:1 id id2 geography using `ot', assert(3) nogen

gen fips = substr(id,10,2)
destring fips, replace
preserve
	use http://www.stata-press.com/data/r13/educ3, clear
	keep state division
	duplicates drop
	rename state fips
	drop if mi(division)
	tempfile div
	save `div'
restore
merge m:1 fips using `div'
replace division = 8 if fips==56
replace division = 5 if fips==11
assert !mi(division)
drop _merge
label drop _all

egen rm = rowmiss(*)
assert rm == 0
drop rm
order fips division, after(id2)

foreach var in wh bl na as pi ot mu {
	foreach g in m f {
		egen `var'_`g'_under18 = rowtotal(`var'_`g'_under5 `var'_`g'_5to9 `var'_`g'_10to14 `var'_`g'_15to17)
		egen `var'_`g'_18to29 = rowtotal(`var'_`g'_18and19 `var'_`g'_20to24 `var'_`g'_25to29)
		egen `var'_`g'_30to44 = rowtotal(`var'_`g'_30to34 `var'_`g'_35to44)
		egen `var'_`g'_45to64 = rowtotal(`var'_`g'_45to54 `var'_`g'_55to64)
		egen `var'_`g'_over65 = rowtotal(`var'_`g'_65to74 `var'_`g'_75to84 `var'_`g'_85)
		}
		}
		
	keep id-geography *total *under18 *18to29 *30to44 *45to64 *over65
	
	foreach var in _total _under18 _18to29 _30to44 _45to64 _over65 {
		foreach g in m f {
			egen oth_`g'`var' = rowtotal(na_`g'`var' as_`g'`var' pi_`g'`var' ot_`g'`var' mu_`g'`var')
			}
			}
		drop na_* as_* pi* ot_* mu_*
	
	egen totpop = rowtotal(*_total)
	drop *_total
	
	drop id
	rename id2 county_fips
	
reshape long wh_m_ wh_f_ bl_m_ bl_f_ oth_m_ oth_f_, i(county_fips fips division geography totpop) j(age) string
renvars *_, postsub(_ )
reshape long wh_ bl_ oth_, i(county_fips fips division geography totpop age) j(gender) string
renvars *_, prefix(pop)
renvars pop*, postsub(_ )
reshape long pop, i(county_fips fips division geography totpop age gender) j(race) string

isid county_fips age gender race
gen prop = pop/totpop


	// Just quick checks...
	bys gender: sum prop
	bys age: sum prop
	bys race: sum pop prop
		
		// MC: Double checked against data.census.gov counts for Marion County IN, Suffolk County MA, Delaware County PA, and Monmouth County NJ --> all check out.

		/*. tab age

			age |      Freq.     Percent        Cum.
	------------+-----------------------------------
		 18to29 |     18,642       20.00       20.00
		 30to44 |     18,642       20.00       40.00
		 45to64 |     18,642       20.00       60.00
		 over65 |     18,642       20.00       80.00
		under18 |     18,642       20.00      100.00
	------------+-----------------------------------
		  Total |     93,210      100.00

	. tab gender

		 gender |      Freq.     Percent        Cum.
	------------+-----------------------------------
			  f |     46,605       50.00       50.00
			  m |     46,605       50.00      100.00
	------------+-----------------------------------
		  Total |     93,210      100.00

	. tab race

		   race |      Freq.     Percent        Cum.
	------------+-----------------------------------
			 bl |     31,070       33.33       33.33
			oth |     31,070       33.33       66.67
			 wh |     31,070       33.33      100.00
	------------+-----------------------------------
		  Total |     93,210      100.00

		*/
		
	rename gender sex
	
	rename county_fips countyid
	
saveold "$data/synth_county.dta", replace version(12) // saveold for R

log close

